/*****************************************************
PROJECT:  BRIDGE C2
JOB:  Surveillance Reports
REQUESTOR: 
JIRA #: 
SITEIDS ALLOWED:  
ANALYST: Jean
DATE: 9/12/2023


PEER REVIEWER: 

DESCRIPTION: 

Input fil

*****************************************************/;
options compress=yes reuse=yes;
libname RD odbc noprompt="dsn=research_dev;Trusted_Connection=yes" bulkload=yes;
libname SCRAP "e:\\sasroot\BRIDGE C2\SCRAP\";

proc sql;
create table rd.jpo_BC_cohort as select* from scrap.scrap_BC_patients;
 
proc datasets library=reschdev;
delete jpo_BC_screens_lu;
quit;
proc sql; create table RD.jpo_BC_screens_lu as select * from scrap.BC_mammography_codes; quit;


*screening orders and results from order_proc;
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table BC_orders as 
select * from connection to mycon (

SELECT distinct
	den.raw_patid
    ,op.ORDERING_DATE
	,op.result_time
	,op.proc_id
	,order_status_c
	,op.is_pending_ord_yn
	,op.order_proc_id
	,op.abnormal_yn
	,op.display_name
	,op.result_time 	
FROM
    research_dev.dbo.jpo_BC_cohort  AS den
		inner join Clarity.dbo.ORDER_PROC as op
			on den.raw_patid = op.pat_id 
        inner join research_dev.dbo.jpo_BC_screens_lu AS ppc
      ON op.PROC_ID = ppc.proc_id);
quit;
*3=resulted 5=completed;
*2=sent ;
* 4 canceled ;

data BC_orders2 ;
set BC_orders;
length source $20;
Source='Order proc';
format order_date date9.;
order_date=datepart(ordering_date);
where order_status_c^=4;
if order_date<'01JUL2011'd then delete;
if order_date>'29FEB2020'd then delete;
run;

data BC_screen_procs;
set BC_orders;
format screen_date date9.;
screen_date=datepart(result_time);
where result_time^=.;
if screen_date<'01JUL2011'd then delete;
if screen_date>'29FEB2020'd then delete;
run;


data ordered_screens;
set BC_screen_procs;
if screen_date<order_date-60 then delete;
run;



/*limit to orders in study period*/
/*join to birthdate and delete HIV at <30 y*/
/*birthdates in ADV would miss changed patid*/
proc sql;
create table orders3 as
select o.*,
p.index_visit,
p.age_at_index_visit
from BC_orders2 o left join scrap.BC_patient_covariates p
on o.raw_patid=p.raw_patid;
quit;

data orders4;
set orders3;
age_at_order=age_at_index_visit+yrdif(index_visit,order_date,'age');
where order_date>='01JUL2016'd;
run;*47.4-76.5proc means min max data=orders4;

data orders4;
set orders4;
screen_date=datepart(result_time);
if screen_date=. then screen_date=datepart(result_time0);
if age_at_order<50 then delete;
if age_at_order>=75 then delete;
run;




proc sql;
select count(order_proc_id), count(distinct order_proc_id) from orders4;
*deduplicate;
proc sql;
create table orders5  as
select raw_patid,
order_date,
order_proc_id,
max(screen_date) as screen_date format date9.,
count(order_proc_id) as records,
count(distinct source) as sources
from orders4
group by raw_patid,
order_date,
order_proc_id;


data scrap.BC_order_dates_new;
set orders5;
run;

proc datasets library=work;
save ordered_screens;
run;


/***********************************************************
merge to screen results
***********************************************************/

/*pull from surgical history*/
proc sql;
connect to odbc as mycon
(required = "dsn=research_dev;Trusted_Connection=yes");
create table BC_shx as 
select * from connection to mycon (
     SELECT den.raw_patid	
  	,shx.contact_date 
	,shx.surg_hx_start_dt
     ,shx.SURGICAL_HX_DATE 
     FROM research_dev.dbo.jpo_BC_cohort  AS den
     INNER JOIN clarity.dbo.SURGICAL_HX        AS shx
      ON den.raw_patID = shx.PAT_ID
     INNER JOIN research_dev.dbo.jpo_BC_screens_lu AS cpc
      ON shx.PROC_ID = cpc.proc_id)
	;      
quit;
/*only cases where the exact date of the surgical procedure is known*/*/;

data BC_SHX2 ;
	set BC_SHX;
	length source $20;
	Source='Surgical Hx';
	month=0;
	day=0;
	format screen_date date9.;
	if surg_hx_start_dt^=. then
		do;
			screen_date=datepart(surg_hx_start_dt);
			if screen_date>='01JUL2011'd and screen_date<='29FEB2020'd then 
				output BC_SHX2;
		end;
	if surg_hx_start_dt=. then
		do;
			if surgical_hx_date^=' ' and surgical_hx_date^='NO REPORT' then
				do;
					if index(surgical_hx_date,'/')^=0 then
						do;
							month=input (scan(surgical_hx_date,1,'/'),best4.);
							day=input (scan(surgical_hx_date,2,'/'),best4.);
							year=input (scan(surgical_hx_date,3,'/'),best4.);
						end;
					if index(surgical_hx_date,'-')^=0 then
						do;
							month=input (scan(surgical_hx_date,1,'-'),best4.);
							day=input (scan(surgical_hx_date,2,'-'),best4.);
							year=input (scan(surgical_hx_date,3,'-'),best4.);
						end;
					if month>0 and month<13 and
						day>0 and day<32 and
						year>2005 and year<2022 then
							do;
								screen_date=mdy(month, day, year);
							end;
			if screen_date>='01JUL2011'd and screen_date<='29FEB2020'd then 
				output BC_SHX2;
				end;
		end;
run;

proc datasets library=work;
delete BC_shx;
quit;

/*Screens from HM_history*/
proc sql;
connect to odbc as mycon
(required = "dsn=OCHIN_HCN_FENWAY;Trusted_Connection=yes");
create table HMT_hx as select *
from connection to mycon (
SELECT 
	p.raw_patid
    ,hmt.NAME as HMT_name
	,hmo.HM_TOPIC_ID
    ,hmo.HM_HX_Date
FROM 
	Clarity.dbo.HM_HISTORY hmo
		INNER JOIN research_dev.dbo.jpo_BC_cohort p
			ON hmo.PAT_ID = p.raw_PATID
		LEFT JOIN Clarity.dbo.CLARITY_HM_TOPIC hmt
			ON hmt.HM_TOPIC_ID = hmo.HM_TOPIC_ID
WHERE 
	
		( hmo.HM_TOPIC_ID IN (6)) 
	and hmo.HM_TYPE_C = 1 );


data HMT_HX2;
set HMT_HX;
length source $20 ;
format screen_date date9.;
screen_date=datepart(HM_HX_date);
Source='HMT HX';
run;

proc means nmiss data=ordered_screens;
var order_proc_id;
run;
/*combine screens with no order_proc_id;*/
data screens;
set BC_shx2 HMT_HX2;
run;

/*deduplicate*/
proc sql;
create table BC_screen_dates as
select raw_patid,
screen_date,
sum(source='HMT HX') as HMT_HX_records,
sum(source='Surgical Hx') as Surg_hx_records
from screens
group by raw_patid,
screen_date;


/*combine with prior pulled screen to ensure none are missing*/
data BC_screen_dates;
set BC_screen_dates scrap.BC_screen_dates;
run;

/*add age at screen_date*/
/*limit to orders in study period*/
/*join to birthdate and delete HIV at <30 y*/
/*birthdates in ADV would miss changed patid*/
proc sql;
create table screens2 as
select o.*,
p.index_visit,
p.age_at_index_visit
from BC_screen_dates o left join scrap.BC_patient_covariates p
on o.raw_patid=p.raw_patid;
quit;

data screens3;
set screens2;
age_at_screen=age_at_index_visit+yrdif(index_visit,screen_date,'age');
where screen_date>='01JUL2016'd and screen_date<='29FEB2020'd;
run;

data screens4;
set screens3;
if age_at_screen<50 then delete;
if age_at_screen>=75 then delete;
run;


/******************************************************
Flag completed orders
******************************************************/
/*link orders to observation end*/
proc sql;
create table BCorders2 as
select a.raw_patid,
a.first_due_date,
a.observation_end,
a.one_year_due,
o.order_proc_id, 
o.order_date
from scrap.BC_analysis a inner join 
scrap.BC_order_dates_new o
on a.raw_patid=o.raw_patid and 
o.order_date between a.first_due_date and a.one_year_due
order by raw_patid, order_date;

proc sql;
create table BC2 as
select o.*,
s.screen_date as result_date format date9.
from BCorders2 o left join screens4 s
on s.raw_patid=o.raw_patid and s.screen_date between o.order_date and o.observation_end;
proc sort data=BC2 out=BC3;
by raw_patid order_date result_date;
run;
data single multiple;
set BC3;
by raw_patid;
if first.raw_patid and last.raw_patid then output single;
if ^(first.raw_patid and last.raw_patid) then output single;
run;


/*combine screen_date from order_proc and result_dates from unlinked screens*/
data BC2;
set BC2;
format completion_date screen_date date9.;
completion_date=screen_date;
if result_date^=. then do;
	if screen_date=. or screen_date<order_date then completion_date=result_date;
	if ^(screen_date=. or screen_date<order_date) and 
		result_date<screen_date then completion_date=result_date;
end;
run;







/************************************************************
code new outcome variables, summarize by patient
documented_BC_1yr
completed_BC_1yr
***********************************************************/
data BC3;
set BC2;
documented_BC_1yr=1;
completed_BC=( completion_date^=. and 
	completion_date<=one_year_due);
run;


proc sql;
create table order_summary as
select raw_patid,
max(documented_BC_1yr) as documented_BC_1yr,
max(completed_BC) as completed_BC_1yr
from BC3
group by raw_patid
order by raw_patid;


proc sql;
create table analysis as
select a.*,
max(0,o.documented_BC_1yr) as documented_BC_1yr,
max(0,o.completed_BC_1yr) as completed_BC_1yr
from scrap.BC_ANALYSIS a left join order_summary o
on o.raw_patid=a.raw_patid;


data SCRAP.BC_analysis_09122023;
set analysis;
run;
/*manuscript check 2/21/2024*/
%let cancer=BC;
%Let covariates= race_ethnic2 language_c  FPL_cat2 age_cat payor_type_research visit_cat;
%Let class=delivery_site_id  age_cat race_ethnic2 language_c  FPL_cat2 visit_cat payor_type_research;
%let SDH=SDH_food; 

proc freq data=analysis;
tables &SDH;
run;
%let SDH=SDH_housing; 

proc freq data=analysis;
tables &SDH;
run;
%let SDH=SDH_trans; 

proc freq data=analysis;
tables &SDH;
run;


	proc genmod data=SCRAP.BC_analysis_09122023;
	class &class &sdh /ref=first;
	model due=&sdh &covariates/link=log d=binomial type3;
	repeated subject=delivery_site_id/type=exch;
	LSmeans &sdh/ilink CL om;
	ods output LSmeans=due_&sdh lsmestimates=due_diffs_&sdh;
	lsmestimate &sdh '1 vs 2' -1 0 1/e cl  om ilink;
	lsmestimate &sdh '3 vs 2' -1 1 0 /e cl   om ilink;
	run;
	proc genmod data=SCRAP.BC_analysis_09122023;
	class &class &sdh /ref=first;
	model months_utd=&sdh &covariates/link=log d=negbin type3 offset=months_offset;
	repeated subject=delivery_site_id/type=exch;
	LSmeans &sdh/ilink CL OM;
	ods output LSmeans=mUTD_&sdh lsmestimates=mUTD_diffs_&sdh;
	lsmestimate &sdh '1 vs 2' -1 0 1/cl  om ilink;
	lsmestimate &sdh '3 vs 2' -1 1 0 /cl   om ilink;
	run;
